# R code for "Scientists' political behaviors are not driven by individual-level government benefits"
# 25 March 2020
# Baobao Zhang and Matto Mildenberger

rm(list=ls(all=TRUE))

# Packages
library(readxl)
library(dplyr)
library(readr)
library(gender)
library(survey)
library(pander)
library(ggplot2)
library(stargazer)
library(stringr)
library(ri2)
library(kableExtra)
library(knitr)

# Set file directory
data_folder <- ""

# Specialty functions

# Function to perform regression analysis
analysis_func <- function(outcome_var, treatment_var = "award",
                          weights_var = "ipw", covariates = NULL, df = my_c,
                          my_caption, pander = TRUE, raw_output = TRUE) {
  if (is.null(covariates)) {
    # Analysis without covariates
    results_temp <- lm_robust(as.formula(paste0(outcome_var, " ~ ", treatment_var)), 
                              data = df, weights = df[,weights_var])
  } else {
    # Analysis with covariates and covariates interacted with treatment 
    X <- scale(df[,covariates], center = TRUE, scale = FALSE)
    X <- data.frame(outcome = df[,outcome_var], 
                    treatment = df[,treatment_var], X, df[,treatment_var]*X)
    ipw_weights <- df[,weights_var]
    results_temp <- lm_robust(outcome ~ ., 
                              data = X, weights = ipw_weights)
  }
  # Output the results
  output_res <- data.frame(
    Coef = results_temp$coefficients[c(2, 1)], 
    SE = results_temp$std.error[c(2, 1)],
    p = results_temp$p.value[c(2, 1)])
  row.names(output_res) <- c("Effect of Award", "Intercept")
  if (pander) {
    pander(output_res, caption = my_caption, digits = 3) 
  }
  if (raw_output) {
    return(results_temp)
  }
}

# Function to look at the interaction effect by year
analysis_func_int <- function(outcome_var, treatment_var = "award", int_var_name = "year.x",
                              weights_var = "ipw", covariates = NULL, df = my_c,
                              my_caption, pander = TRUE, raw_output = TRUE) {
  if (is.null(covariates)) {
    results_temp <- lm_robust(as.formula(paste0(outcome_var, " ~ ", treatment_var)), 
                              data = df, weights = df[,weights_var])
  } else {
    X <- scale(df[,covariates], center = TRUE, scale = FALSE)
    int_var <- scale(df[,int_var_name], center = TRUE, scale = FALSE)
    X <- data.frame(outcome = df[,outcome_var], 
                    treatment = df[,treatment_var], 
                    int_var = int_var,
                    int_var_Z = df[,treatment_var] * int_var,
                    X, df[,treatment_var]*X)
    ipw_weights <- df[,weights_var]
    results_temp <- lm_robust(outcome ~ ., 
                              data = X, weights = ipw_weights)
  }
  output_res <- data.frame(
    Coef = results_temp$coefficients[4], 
    SE = results_temp$std.error[4],
    p = results_temp$p.value[4])
  return(output_res)
}

gen_results <- function(model_object, my_caption) {
  data.frame(caption = my_caption,
             est = model_object$coefficients[2],
             SE = model_object$std.error[2],
             p_value = model_object$p.value[2],
             baseline = model_object$coefficients[1],
             baseline_SE = model_object$std.error[1],
             N = model_object$N)
}

# Function to run the multiple regression analysis
linear_reg_func <- function(my_formula, my_data, coef_order, coef_names,
                            my_caption, weights = my_data$ipw) {
  m_Q <- lm_robust(my_formula, data = my_data, weights = weights)
  s_Q <- summary(m_Q)
  c_Q <- ifelse(s_Q$coefficients < 0.001 & s_Q$coefficients >= 0, 
                "< 0.001", sprintf("%.3f",round(s_Q$coefficients, 3)))
  t_Q <- data.frame(Estimates = paste0(c_Q[coef_order, 1], 
                                       " (", c_Q[coef_order, 2], ") "),
                    p_value = c_Q[coef_order, 4], stringsAsFactors = FALSE)
  t_Q <- rbind(t_Q, data.frame(Estimates = m_Q$N, p_value = ""))
  t_Q <- rbind(t_Q, data.frame(Estimates = paste0("F(", s_Q$fstatistic["numdf"], 
                                                  ", ", s_Q$fstatistic["dendf"], ")  = ",
                                                  sprintf("%.3f", round(s_Q$fstatistic["value"], 3))),
                               p_value = paste0("p-value = ", 
                                                sprintf("%.3f", round(1 - pf(s_Q$fstatistic[1], s_Q$fstatistic[2], s_Q$fstatistic[3]), 3)))))
  row.names(t_Q) <- coef_names
  return(stargazer(t_Q, summary = FALSE, title = my_caption))
}


###################################################
# Start of Analysis

# Load data
load(file = paste0(data_folder, "political_contribution_data.Rmd"))
###################################################
# Balance tests
# Regression analysis to check for balance in background characteristics
check_res <- rbind(
  gen_results(model_object = analysis_func(outcome_var = "ivy", 
                                           treatment_var = "award", 
                                           df = my_c, covariates = NULL,
                                           raw_output = TRUE, 
                                           my_caption = "Undergraduate Institute is an Ivy League"), 
              my_caption = "Undergraduate Institute is an Ivy League"),
  gen_results(model_object = analysis_func(outcome_var = "ivy_plus", 
                                           treatment_var = "award", 
                                           df = my_c, covariates = NULL,
                                           raw_output = TRUE, 
                                           my_caption = "Undergraduate Institute is an Ivy League Plus"), 
              my_caption = "Undergraduate Institute is an Ivy League Plus"),
  gen_results(model_object = analysis_func(outcome_var = "ivy_g", 
                                           treatment_var = "award", 
                                           df = my_c, covariates = NULL,
                                           raw_output = TRUE, 
                                           my_caption = "Graduate Institute is an Ivy League"), 
              my_caption = "Graduate Institute is an Ivy League"),
  gen_results(model_object = analysis_func(outcome_var = "ivy_plus_g", 
                                           treatment_var = "award", 
                                           df = my_c, covariates = NULL,
                                           raw_output = TRUE, 
                                           my_caption = "Graduate Institute is an Ivy League Plus"), 
              my_caption = "Graduate Institute is an Ivy League Plus"),
  gen_results(model_object = analysis_func(outcome_var = "pred_male", 
                                           treatment_var = "award", 
                                           df = my_c, covariates = NULL,
                                           raw_output = TRUE, 
                                           my_caption = "Applicant is Predicted Male"), 
              my_caption = "Applicant is Predicted Male")
)

# Check up the results for generating the figure
check_res$stars <- ""
check_res$stars[check_res$p_value < 0.05] <- "*"
check_res$stars[check_res$p_value < 0.01] <- "**"
check_res$stars[check_res$p_value < 0.001] <- "***"
check_res$text <- paste0(sprintf("%.3f", round(check_res$est, 3)), 
                         " (", sprintf("%.3f", round(check_res$SE, 3)), ")",
                         check_res$stars, ";\nBaseline = ", 
                         sprintf("%.3f", round(check_res$baseline, 3)), " (", 
                         sprintf("%.3f", round(check_res$baseline_SE, 3)), ")")
check_res$caption <- factor(check_res$caption, levels = rev(check_res$caption))

# Fig 1. Comparing differences in background characteristics between winners and non-winners
ggplot(data = check_res, aes(x = caption, y = est, ymin = qnorm(0.025)*SE + est,
                             ymax = qnorm(0.975)*SE + est)) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 25),
                   name = "Outcomes") + 
  ylab("Estimates (Differences in Proportions)") + geom_text(aes(x = caption, y = est, label = text), 
                                                             nudge_x = -0.5) + 
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) + 
  geom_pointrange() + coord_flip() + theme_bw() + expand_limits(x = -0.25)

# Check for balance between contact information between winners and non-winners 

# Covariates used for PAP  
pap_covariates <- c("ivy", "ivy_plus", "ivy_g", "ivy_plus_g", "pred_male")

# Output results
email_output <- rbind(
  gen_results(model_object = analysis_func(outcome_var = "missing_email", 
                                           covariates = pap_covariates, 
                                           my_caption = "Missing Email Address", raw_output = TRUE),
              my_caption = "Missing Email Address"),
  gen_results(analysis_func(outcome_var = "missing_all", covariates = pap_covariates, 
                            my_caption = "Missing All Contact Information", raw_output = TRUE),
              my_caption = "Missing All Contact Information"),
  gen_results(analysis_func(outcome_var = "edu_email", covariates = pap_covariates, 
                            my_caption = "Has .edu Email Address", raw_output = TRUE), 
              my_caption = "Has .edu Email Address"),
  gen_results(analysis_func(outcome_var = "org_email", covariates = pap_covariates, 
                            my_caption = "Has .org Email Address", raw_output = TRUE),
              my_caption = "Has .org Email Address"),
  gen_results(analysis_func(outcome_var = "gov_email", covariates = pap_covariates, 
                            my_caption = "Has .gov Email Address", raw_output = TRUE),
              my_caption = "Has .gov Email Address"),
  gen_results(analysis_func(outcome_var = "com_email", covariates = pap_covariates, 
                            my_caption = "Has .com Email Address (Excluding Personal Emails)",
                            raw_output = TRUE),  
              my_caption = "Has .com Email Address (Excluding Personal Emails)"),
  gen_results(analysis_func(outcome_var = "personal_email", covariates = pap_covariates, 
                            my_caption = "Has Personal Email Address Only", raw_output = TRUE),
              my_caption = "Has Personal Email Address Only")  
)

# Prepare the data for data viz
email_output$stars <- ""
email_output$stars[email_output$p_value < 0.05] <- "*"
email_output$stars[email_output$p_value < 0.01] <- "**"
email_output$stars[email_output$p_value < 0.001] <- "***"
email_output$text <- paste0(sprintf("%.3f", round(email_output$est, 3)), 
                            " (", sprintf("%.3f", round(email_output$SE, 3)), ")",
                            email_output$stars, ";\nBaseline = ", 
                            sprintf("%.3f", round(email_output$baseline, 3)), " (", 
                            sprintf("%.3f", round(email_output$baseline_SE, 3)), ")")
email_output$caption <- factor(email_output$caption, levels = rev(email_output$caption))

# Fig. 2 Comparing differences in contact information between winners and non-winners
ggplot(data = email_output, aes(x = caption, y = est, ymin = qnorm(0.025)*SE + est,
                                ymax = qnorm(0.975)*SE + est)) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 25),
                   name = "Outcomes") + 
  ylab("Estimates (Differences in Proportions)") + 
  geom_text(aes(x = caption, y = est, label = text), nudge_x = -0.5) + 
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) + 
  geom_pointrange() + coord_flip() + theme_bw() +
  expand_limits(x = -0.5)

# Function to perform the naive balance tests
native_balance <- function(caption, outcome_var) {
  lm_res <- lm_robust(formula = as.formula(paste0(outcome_var, " ~ award")), data = my_c) 
  return(data.frame(caption, estimate = lm_res$coefficients[2], 
                    se = lm_res$std.error[2], p_value = lm_res$p.value[2], baseline = lm_res$coefficients[1],
                    n = nrow(my_c)))
}

# Perform the naive balance tests
naive_balance <- rbind(native_balance(caption = "Undergraduate Institute is an Ivy League", outcome_var = "ivy"),
                       native_balance(caption = "Undergraduate Institute is an Ivy League Plus", outcome_var = "ivy_plus"),
                       native_balance(caption = "Graduate Institute is an Ivy League", outcome_var = "ivy_g"),
                       native_balance(caption = "Graduate Institute is an Ivy League Plus", outcome_var = "ivy_plus_g"),
                       native_balance(caption = "Applicant is Predicted Male", outcome_var = "pred_male")
)

# Clean up the results for making the tables
naive_balance$est_text <- paste0(round(naive_balance$estimate, 3), " (",
                                 round(naive_balance$se, 3), ")")
# S1 Tab 9. Naive balance test: simple difference-in-means between winners and non-winners
stargazer(naive_balance[,c("caption", "est_text", "baseline", "p_value")],
          summary = FALSE, rownames = FALSE,
          title = "Naive Balance Test: Simple Difference-in-Means Between Winners and Non-winners", 
          covariate.labels = c("Outcome", "Estimate (SE)", "Baseline", "p-value"))

###################################################
# Political donation results

# Regression analysis
# Number of donations
num_donate <- rbind(gen_results(analysis_func(outcome_var = "donate_num", covariates = pap_covariates,
                                              my_caption = "Number of Donations", df = my_c, raw_output = TRUE),
                                my_caption = "Number of Donations"),
                    gen_results(analysis_func(outcome_var = "donate_num_D", covariates = pap_covariates,
                                              my_caption = "Number of Donations to Democrats", 
                                              df = my_c, raw_output = TRUE),
                                my_caption = "Number of Donations to Democrats"),
                    gen_results(analysis_func(outcome_var = "donate_num_R", covariates = pap_covariates,
                                              my_caption = "Number of Donations to Republicans", 
                                              df = my_c, raw_output = TRUE), 
                                my_caption = "Number of Donations to Republicans"))
num_donate$group <- "Number of Donations"
# Donations in USD
amount_donate <- rbind(gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                                 covariates = pap_covariates,
                                                 my_caption = "Donations (in USD)", df = my_c, raw_output = TRUE),
                                   my_caption = "Amount Donated"),
                       gen_results(analysis_func(outcome_var = "donate_amount_D", covariates = pap_covariates,
                                                 my_caption = "Amount Donated to Democrats", df = my_c, raw_output = TRUE),
                                   my_caption = "Amount Donated to Democrats"),
                       gen_results(analysis_func(outcome_var = "donate_amount_R", covariates = pap_covariates, 
                                                 my_caption = "Donations to Republicans (in USD)",
                                                 df = my_c, raw_output = TRUE), my_caption = "Amount Donated to Republicans"))
amount_donate$group <- "Amount Donated in USD"

# Combine the results
donate_res <- rbind(num_donate, amount_donate)

# Clean up the results for the data visualization
donate_res$stars <- ""
donate_res$stars[donate_res$p_value < 0.05] <- "*"
donate_res$stars[donate_res$p_value < 0.01] <- "**"
donate_res$stars[donate_res$p_value < 0.001] <- "***"
donate_res$caption <- factor(donate_res$caption, levels = rev(donate_res$caption))
donate_res$text_y <- donate_res$est

# Fig 4. Effect of being awarded the NSF Graduate Research Fellowship on Political Donations
ggplot(data = donate_res, aes(x = caption, y = est, ymin = qnorm(0.025)*SE + est,
                              ymax = qnorm(0.975)*SE + est)) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 25),
                   name = "Outcomes") + 
  ylab("Estimated Effects") + 
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) + 
  geom_pointrange() + facet_wrap(~group, scales = "free", ncol = 2) +
  coord_flip() + theme_bw() 

# Clean up results for table
donate_res$est <- paste0(round(donate_res$est, 2), " (", round(donate_res$SE, 2), ")")
donate_res$pval <- round(donate_res$p_value, 3)
donate_res$base <- round(donate_res$baseline, 2)
# S1 Tab 3. Effect of being awarded the NSF Graduate REsearch Fellowship on political donations; including background covariates
stargazer(donate_res[c(4:6, 1:3),c("caption", "est", "base", "pval")], digits = NA,
          summary = FALSE, 
          rownames = FALSE)

# Regression analysis: only controlling for predicted gender
# Number of donations
num_donate_pred_male <- rbind(gen_results(analysis_func(outcome_var = "donate_num", covariates = "pred_male",
                                                        my_caption = "Number of Donations", df = my_c, raw_output = TRUE),
                                          my_caption = "Number of Donations"),
                              gen_results(analysis_func(outcome_var = "donate_num_D", covariates = "pred_male",
                                                        my_caption = "Number of Donations to Democrats", 
                                                        df = my_c, raw_output = TRUE),
                                          my_caption = "Number of Donations to Democrats"),
                              gen_results(analysis_func(outcome_var = "donate_num_R", covariates = "pred_male",
                                                        my_caption = "Number of Donations to Republicans", 
                                                        df = my_c, raw_output = TRUE), 
                                          my_caption = "Number of Donations to Republicans"))
num_donate_pred_male$group <- "Number of Donations"
# Donations in USD
amount_donate_pred_male <- rbind(gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                                           covariates = "pred_male",
                                                           my_caption = "Donations (in USD)", df = my_c, raw_output = TRUE),
                                             my_caption = "Amount Donated"),
                                 gen_results(analysis_func(outcome_var = "donate_amount_D", covariates = "pred_male",
                                                           my_caption = "Amount Donated to Democrats", df = my_c, raw_output = TRUE),
                                             my_caption = "Amount Donated to Democrats"),
                                 gen_results(analysis_func(outcome_var = "donate_amount_R", covariates = "pred_male", 
                                                           my_caption = "Donations to Republicans (in USD)",
                                                           df = my_c, raw_output = TRUE), my_caption = "Amount Donated to Republicans"))
amount_donate_pred_male$group <- "Amount Donated in USD"
donate_res_pred_male <- rbind(num_donate_pred_male, amount_donate_pred_male)
# Prepare the data for data viz
donate_res_pred_male$stars <- ""
donate_res_pred_male$stars[donate_res$p_value < 0.05] <- "*"
donate_res_pred_male$stars[donate_res$p_value < 0.01] <- "**"
donate_res_pred_male$stars[donate_res$p_value < 0.001] <- "***"
donate_res_pred_male$caption <- factor(donate_res$caption, levels = rev(donate_res$caption))
donate_res_pred_male$text_y <- donate_res$est
# Make the table
donate_res_pred_male$est <- paste0(round(donate_res_pred_male$est, 2), " (", round(donate_res_pred_male$SE, 2), ")")
donate_res_pred_male$pval <- round(donate_res_pred_male$p_value, 3)
donate_res_pred_male$base <- round(donate_res_pred_male$baseline, 2)
# Make the table
stargazer(donate_res_pred_male[c(4:6, 1:3),c("caption", "est", "base", "pval")], digits = NA,
          summary = FALSE, covariate.labels = c("Outcome", "Estimate (SE)", "Baseline", "p-value"),
          rownames = FALSE)

# Regression analysis: without covariates
# Number of donations
num_donate_nc <- rbind(gen_results(analysis_func(outcome_var = "donate_num", covariates = NULL,
                                                 my_caption = "Number of Donations", df = my_c, raw_output = TRUE),
                                   my_caption = "Number of Donations"),
                       gen_results(analysis_func(outcome_var = "donate_num_D", covariates = NULL,
                                                 my_caption = "Number of Donations to Democrats", 
                                                 df = my_c, raw_output = TRUE),
                                   my_caption = "Number of Donations to Democrats"),
                       gen_results(analysis_func(outcome_var = "donate_num_R", covariates = NULL,
                                                 my_caption = "Number of Donations to Republicans", 
                                                 df = my_c, raw_output = TRUE), 
                                   my_caption = "Number of Donations to Republicans"))
num_donate_nc$group <- "Number of Donations"
# Donations in USD
amount_donate_nc <- rbind(gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                                    covariates = NULL,
                                                    my_caption = "Donations (in USD)", df = my_c, raw_output = TRUE),
                                      my_caption = "Amount Donated"),
                          gen_results(analysis_func(outcome_var = "donate_amount_D", covariates = NULL,
                                                    my_caption = "Amount Donated to Democrats", df = my_c, raw_output = TRUE),
                                      my_caption = "Amount Donated to Democrats"),
                          gen_results(analysis_func(outcome_var = "donate_amount_R", covariates = NULL, 
                                                    my_caption = "Donations to Republicans (in USD)",
                                                    df = my_c, raw_output = TRUE), my_caption = "Amount Donated to Republicans"))
amount_donate_nc$group <- "Amount Donated in USD"
donate_res_nc <- rbind(num_donate_nc, amount_donate_nc)
# Prepare the data for data viz
donate_res_nc$stars <- ""
donate_res_nc$stars[donate_res$p_value < 0.05] <- "*"
donate_res_nc$stars[donate_res$p_value < 0.01] <- "**"
donate_res_nc$stars[donate_res$p_value < 0.001] <- "***"
donate_res_nc$caption <- factor(donate_res$caption, levels = rev(donate_res$caption))
donate_res_nc$text_y <- donate_res$est
# Make the table
donate_res_nc$est <- paste0(round(donate_res_nc$est, 2), " (", round(donate_res_nc$SE, 2), ")")
donate_res_nc$pval <- round(donate_res_nc$p_value, 3)
donate_res_nc$base <- round(donate_res_nc$baseline, 2)
# S1 Tab 4. Effect of being award the NSF Graduate Research Fellowship on political donations; not including background covariates
stargazer(donate_res_nc[c(4:6, 1:3),c("caption", "est", "base", "pval")], digits = NA,
          summary = FALSE, 
          rownames = FALSE)

# Interaction effect: treatment interacted with decision year
# Note: This analysis will not be possible with the publicly available data. 
# To protect subjects' privacy, we have anonymized the application year variable.
# Please contact us and we can run the analysis for you.
# donation_outcomes <- c("donate_amount_total", "donate_amount_D", "donate_amount_R",
#                        "donate_num", "donate_num_D", "donate_num_R")
# donation_int <- do.call(rbind, lapply(X = donation_outcomes, analysis_func_int, covariates = pap_covariates))
# donation_int$caption <- c("Amount Donated", "Amount Donated to Democrats", "Amount Donated to Republicans",
#                           "Number of Donations", "Number of Donations to Democrats", "Number of Donations to Republicans")
# donation_int$est_text <- paste0(round(donation_int$Coef, 3), 
#                                 " (", round(donation_int$SE, 3), ")")
# S1 Tab 12: interaction effects of award and year of application on political donation 
# stargazer(donation_int[,c("caption", "est_text", "p")], digits = 3,
#           summary = FALSE, covariate.labels = c("Interaction Effect Estimate (SE)", "p-value"),
#           rownames = FALSE, title = "Interaction Effect of Award and Year of Application on Political Donations")


# Robustness check: randomization inference

# Delcare the design of the study
my_c$Z <- my_c$award # assign the treatment variable as Z
block_m <- with(my_c, tapply(award, block, sum))
declaration <- 
  with(my_c,{
    declare_ra(
      blocks = block,
      block_m = block_m)
  })

# Demean the covariates
my_c$ivy_c <- my_c$ivy - mean(my_c$ivy)
my_c$ivy_plus_c <- my_c$ivy_plus - mean(my_c$ivy_plus)
my_c$ivy_g_c <- my_c$ivy_g - mean(my_c$ivy_g)
my_c$ivy_plus_g_c <- my_c$ivy_plus_g - mean(my_c$ivy_plus_g)
my_c$pred_male_c <- my_c$pred_male - mean(my_c$pred_male)

# Create a vector of outcome variable names
donation_outcomes <- c("donate_num", "donate_num_D", "donuate_num_R", 
                       "donate_amount_total", "donate_amount_D", "donate_amount_R")

# Function to run the Wilcox
ri_function_wilcox <- function(outcome, sims = 1000, mydata = my_c, save_sims = FALSE) {
  # First, we control for the covariates by regressing the outcome on the covariates and getting out the residuals
  ri_formula <- paste0(outcome, " ~ ivy_c + ivy_plus_c + ivy_g_c + ivy_plus_g_c + pred_male_c")
  res_lm <- lm(as.formula(ri_formula), data = mydata)
  mydata$Y <- res_lm$residuals
  # Create the function to perform the Wilcoxon Rank Sum test 
  test_fun <- function(data = mydata) {
    wt <- wilcox.test(data[,"Y"] ~ data[,"Z"], alternative = "two.sided")
    wt$statistic
  }
  # Function to run the permutation test 
  ri2_out <- conduct_ri(
    test_function = test_fun,
    declaration = declaration,
    assignment = "Z",
    outcome = "Y",
    sharp_hypothesis = 0,
    data = mydata, progress_bar = TRUE
  )
  # Output the simulations
  if (save_sims) {
    write.csv(ri2_out$sims_df, 
              file = paste0(output_folder, outcome, "_ri_w_sims.csv"),
              append = FALSE)
  }
  return(data.frame(outcome, summary(ri2_out)))
}

# Run the permutation tests (this takes a while)
set.seed(318)
donate_amount_total_ri_w <- ri_function_wilcox(outcome = "donate_amount_total")
donate_amount_D_ri_w <- ri_function_wilcox(outcome = "donate_amount_D")
donate_amount_R_ri_w <- ri_function_wilcox(outcome = "donate_amount_R")

set.seed(318)
donate_amount_total_ri_w <- ri_function_wilcox_raw(outcome = "donate_amount_total")
donate_amount_D_ri_w <- ri_function_wilcox(outcome = "donate_amount_D")
donate_amount_R_ri_w <- ri_function_wilcox(outcome = "donate_amount_R")

# Combine the results
wfi <- rbind(donate_amount_total_ri_w, donate_amount_D_ri_w, donate_amount_R_ri_w)
# Clean up the results
ri_w_outcome <- data.frame(outcome = c("Amount Donated", "Amount Donated to Democrats", 
                                       "Amount Donated to Republicans"),
                           estimate = wfi$estimate,
                           p_value = wfi$two_tailed_p_value)
# S1 Tab 10: Effect of being awarded the NSF Graduate Research Fellowship on political donation amounts: randomization inference results
stargazer(ri_w_outcome, summary = FALSE, rownames = FALSE, 
          covariate.labels = c("Outcome", "Wilcoxon rank statistic", "Two-sided p-value"),
          title = "Effect of Being Awarded the NSF Graduate Research Fellowship on Political Donation Amounts: Randomization Inference Results")

# Robustness test by removing the top 1-4 donors

# Top 1-4 Donors
my_c$donate_amount_total_rank <- rank(my_c$donate_amount_total)
my_c_e1 <- my_c
my_c_e1$donate_amount_total[my_c_e1$donate_amount_total_rank == 2119] <- 0
my_c_e2 <- my_c
my_c_e2$donate_amount_total[my_c_e2$donate_amount_total_rank %in% 2118:2119] <- 0
my_c_e3 <- my_c
my_c_e3$donate_amount_total[my_c_e3$donate_amount_total_rank %in% 2117:2119] <- 0
my_c_e4 <- my_c
my_c_e4$donate_amount_total[my_c_e4$donate_amount_total_rank %in% 2116:2119] <- 0
# Analysis: donation total amount
r_top4 <- rbind(gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                          covariates = pap_covariates, 
                                          my_caption = "Donations (in USD)", 
                                          df = my_c_e1, 
                                          raw_output = TRUE),
                            my_caption = "Top 1 Donor Set to $0"),
                gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                          covariates = pap_covariates, 
                                          my_caption = "Donations (in USD)", 
                                          df = my_c_e2, 
                                          raw_output = TRUE),
                            my_caption = "Top 1-2 Donors Set to $0"),
                gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                          covariates = pap_covariates, 
                                          my_caption = "Donations (in USD)", 
                                          df = my_c_e3, 
                                          raw_output = TRUE),
                            my_caption = "Top 1-3 Donors Set to $0"),
                gen_results(analysis_func(outcome_var = "donate_amount_total", 
                                          covariates = pap_covariates,
                                          my_caption = "Donations (in USD)", 
                                          df = my_c_e4, 
                                          raw_output = TRUE),
                            my_caption = "Top 1-4 Donors Set to $0"))
r_top4$group <- "Amount Donated (in USD)"
# Top 4 Democratic donors
my_c$donate_amount_D_rank_D <- rank(my_c$donate_amount_D)
my_c_e1 <- my_c
my_c_e1$donate_amount_D[my_c_e1$donate_amount_D_rank_D == 2119] <- 0
my_c_e2 <- my_c
my_c_e2$donate_amount_D[my_c_e2$donate_amount_D_rank_D %in% 2118:2119] <- 0
my_c_e3 <- my_c
my_c_e3$donate_amount_D[my_c_e3$donate_amount_D_rank_D %in% 2117:2119] <- 0
my_c_e4 <- my_c
my_c_e4$donate_amount_D[my_c_e4$donate_amount_D_rank_D %in% 2116:2119] <- 0
# Run the analysis: donation amount to Democrats
r_top4_D <- rbind(gen_results(analysis_func(outcome_var = "donate_amount_D", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)", 
                                            df = my_c_e1, 
                                            raw_output = TRUE),
                              my_caption = "Top 1 Democratic Donor Set to $0"),
                  gen_results(analysis_func(outcome_var = "donate_amount_D", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)", 
                                            df = my_c_e2, 
                                            raw_output = TRUE),
                              my_caption = "Top 1-2 Democratic Donors Set to $0"),
                  gen_results(analysis_func(outcome_var = "donate_amount_D", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)", 
                                            df = my_c_e3, 
                                            raw_output = TRUE),
                              my_caption = "Top 1-3 Democratic Donors Set to $0"),
                  gen_results(analysis_func(outcome_var = "donate_amount_D", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)",
                                            df = my_c_e4, 
                                            raw_output = TRUE),
                              my_caption = "Top 1-4 Democratic Donors Set to $0"))
r_top4_D$group <- "Amount Donated to Democrats (in USD)"
# Top 4 Republican Donors
my_c$donate_amount_R_rank_R <- rank(my_c$donate_amount_R)
my_c_e1 <- my_c
my_c_e1$donate_amount_R[my_c_e1$donate_amount_R_rank_R == 2119] <- 0
my_c_e2 <- my_c
my_c_e2$donate_amount_R[my_c_e2$donate_amount_R_rank_R %in% 2118:2119] <- 0
my_c_e3 <- my_c
my_c_e3$donate_amount_R[my_c_e3$donate_amount_R_rank_R %in% 2117:2119] <- 0
my_c_e4 <- my_c
my_c_e4$donate_amount_R[my_c_e4$donate_amount_R_rank_R %in% 2116:2119] <- 0
# Run the analysis: donation amount to Republicans
r_top4_R <- rbind(gen_results(analysis_func(outcome_var = "donate_amount_R", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)", 
                                            df = my_c_e1, 
                                            raw_output = TRUE),
                              my_caption = "Top 1 Republican Donor Set to $0"),
                  gen_results(analysis_func(outcome_var = "donate_amount_R", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)",
                                            df = my_c_e2, 
                                            raw_output = TRUE),
                              my_caption = "Top 1-2 Republican Donors Set to $0"),
                  gen_results(analysis_func(outcome_var = "donate_amount_R", 
                                            covariates = pap_covariates, 
                                            my_caption = "Donations (in USD)", 
                                            df = my_c_e3, 
                                            raw_output = TRUE),
                              my_caption = "Top 1-3 Republican Donors Set to $0"),
                  gen_results(analysis_func(outcome_var = "donate_amount_R", 
                                            covariates = pap_covariates,
                                            my_caption = "Donations (in USD)",
                                            df = my_c_e4, 
                                            raw_output = TRUE),
                              my_caption = "Top 1-4 Republican Donors Set to $0"))
r_top4_R$group <- "Amount Donated to Republicans (in USD)"

# Combine the outputs
r_top4 <- rbind(r_top4, r_top4_D, r_top4_R)

# Clean up the output results for creating table
r_top4$stars <- ""
r_top4$stars[r_top4$p_value < 0.05] <- "*"
r_top4$stars[r_top4$p_value < 0.01] <- "**"
r_top4$stars[r_top4$p_value < 0.001] <- "***"
r_top4$text <- paste0(sprintf("%.2f", round(r_top4$est, 2)), 
                      " (", sprintf("%.2f", round(r_top4$SE, 2)), ")",
                      r_top4$stars, ";\nBaseline = ", 
                      sprintf("%.2f", round(r_top4$baseline, 2)), " (", 
                      sprintf("%.2f", round(r_top4$baseline_SE, 2)), ")")
r_top4$caption <- factor(r_top4$caption, levels = rev(r_top4$caption))
# S1 Fig 1. Robustness check: effect of being awarded the NSF Graduate Research Fellowship on political donations by setting top donations to $0
ggplot(data = r_top4, aes(x = caption, y = est, ymin = qnorm(0.025)*SE + est,
                          ymax = qnorm(0.975)*SE + est)) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 25),
                   name = "Changes to the Dependent Variable") + 
  ylab("Estimated Effects") + geom_text(aes(x = caption, y = est, label = text), 
                                        nudge_x = -0.5) + 
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) + 
  geom_pointrange() + facet_wrap(~group, scales = "free", nrow = 3) +
  coord_flip(xlim = c(0.25, 3.75)) + theme_bw()


######################################################################
# Comparing survey respondents versus non-respondents
# Note: This analysis will not be possible with the publicly available data. 
# To protect subjects' privacy, we have anonymized the year applied and field of study variables.
# Please contact us and we can run the analysis for you.

# # Create a vector of the background characteristics
# char_vars <- c("Award Winner",
#                "Undergraduate Institute Is an Ivy League",
#                "Undergraduate Institute Is an Ivy League Plus",
#                "Graduate Institute Is an Ivy League",
#                "Graduate Institute Is an Ivy League Plus",
#                "Applicant is Male", "Year Applied",
#                "Field: Computer Science", "Field: Engineering",
#                "Field: Geosciences", "Field: Life Sciences",
#                "Field: Material Sciences", "Field: Math",
#                "Field: Physics and Astronomy", "Field: Psychology", "Field: Social Sciences")
# 
# # Run the multiple regression
# # S1 Tab 6. Predicting survey response use applicants' characteristics
# linear_reg_func(my_formula = as.formula("took_survey ~ award + ivy + ivy_plus +
#                        ivy_g + ivy_plus_g + pred_male + year.x +
#                                         field.x"),
#                 my_data = my_m, 
#                 weights = rep(1, nrow(my_m)),
#                 coef_order = c(2:17, 1),
#                 coef_names = c(char_vars, "Intercept", "N", "F-statistic"),
#                 my_caption = "Predicting Survey Response Using Applicants' Characteristics")
# 
# # Get the means for those who took the survey versus those who did not
# diff_table <- my_m %>% group_by(took_survey) %>% dplyr::summarise(
#   award = mean(award),
#   ivy = mean(ivy),
#   ivy_plus = mean(ivy_plus),
#   ivy_g = mean(ivy_g),
#   ivy_plus_g = mean(ivy_plus_g),
#   pred_male = mean(pred_male),
#   year = mean(year.x),
#   chemistry = mean(field.x == "chemistry"),
#   computer = mean(field.x == "computer"),
#   engineering = mean(field.x == "engineering"),
#   geoscieces = mean(field.x == "geosciences"),
#   life_sciences = mean(field.x == "life sciences"),
#   materials = mean(field.x == "materials"),
#   math = mean(field.x == "math"),
#   physics_astronomy = mean(field.x == "physics/astronomy"),
#   psychology = mean(field.x == "psychology"),
#   social_sciences = mean(field.x == "social sciences")
# ) %>% t()
# diff_table <- diff_table[-1,]
# 
# # Estimate the differences
# diff_stat <- rbind(
#   summary(lm(award ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(ivy ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(ivy_plus ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(ivy_g ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(ivy_plus_g ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(pred_male ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(year.x ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "chemistry" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "computer" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "engineering" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "geosciences" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "life sciences" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "materials" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "math" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "physics/astronomy" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "psychology" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)],
#   summary(lm(field.x == "social sciences" ~ took_survey, data = my_m), robust = TRUE)$coefficient[2,c(2,4)]
# )
# # Clean up the results
# diff_stars <- rep("", nrow(diff_stat))
# diff_stars[diff_stat[,2] < 0.05] <- "*"
# diff_stars[diff_stat[,2] < 0.01] <- "**"
# diff_stars[diff_stat[,2] < 0.001] <- "***"
# diff_stat <- paste0(sprintf("%.3f", round(diff_table[,1] - diff_table[,2], 3)),
#                     " (", sprintf("%.3f", round(diff_stat[,1], 3)), ")", diff_stars)
# diff_table <- data.frame(sprintf("%.3f", round(diff_table[,1], 3)),
#                          sprintf("%.3f", round(diff_table[,2], 3)),
#                          diff_stat)
# colnames(diff_table) <- c("Did Not Take Surve", "Took Survey", "Difference")
# # Add in the "Field: Chemistry" label because that was in the reference group in the multiple regression
# row.names(diff_table) <- c(char_vars[1:7], "Field: Chemistry", char_vars[8:16])
# # S1 Tab 5. Summary statistics of respondents and non-respondents
# stargazer(diff_table, summary = FALSE, title = "Summary Statistics of Respondents and Non-respondents")


######################################################################
# Survey analysis

######################################################################

# Load data
load(file = paste0(data_folder, "survey_data.Rmd"))

# Balance tests for the survey respondents

# Function to univariate balance tests
single_balance <- function(my_var, my_data = qs) {
  md_sum <- summary(lm_robust(as.formula(paste0(my_var, " ~ award")), data = my_data,
                              weights = my_data$ipw))$coefficients
  temp <- data.frame(NoAward = md_sum[1,1], Award = (md_sum[1,1] + md_sum[2,1]),
                     Estimate = md_sum[2,1], SE = md_sum[2,2], p_value = md_sum[2,4])
  temp$stars <- ""
  temp$stars[temp$p_value < 0.05] <- "*"
  temp$stars[temp$p_value < 0.01] <- "**"
  temp$stars[temp$p_value < 0.001] <- "***"
  return(data.frame(NoAward = sprintf("%.3f", round(temp$NoAward, 3)),
                    Award = sprintf("%.3f", round(temp$Award, 3)),
                    Diff = paste0(ifelse(abs(temp$Estimate) < 0.001, 
                                         paste0("<", ifelse(temp$Estimate < 0, "-", ""), "0.001"),
                                         sprintf("%.3f", round(temp$Estimate, 3))), 
                                  " (",
                                  ifelse(abs(temp$SE) < 0.001, 
                                         paste0("<", ifelse(temp$SE < 0, "-", "") ,"0.001"),
                                         sprintf("%.3f", round(temp$SE, 3))), ")",
                                  temp$stars
                    )))
}

# Background covariates
qs_cov <- c("ivy", "ivy_plus", "ivy_g", "ivy_plus_g", "pred_male",
            "race_m", "race_a", "usborn")
balance_table_qs <- do.call(rbind, lapply(qs_cov, single_balance))
char_vars_2 <- c(char_vars[2:6], "Self-reported Under-represented Minority (Black, Hispanic, and/or Native American)", "Self-reported Asian",
                 "Self-reported Born in the US")
rownames(balance_table_qs) <- char_vars_2
colnames(balance_table_qs) <- c("No Award", "Award", "Difference")
# S1 Tab 7. Summary statistics of the survey sample
stargazer(balance_table_qs, summary = FALSE, 
          title = "Summary Statistics of Survey Respondents")

# Regression: predicting who gets an award
# S1 Tab 8. Predicting Award Using Survey Sample's Respondent Characteristics
linear_reg_func(my_formula = as.formula("award ~ ivy + ivy_plus + 
                       ivy_g + ivy_plus_g + pred_male + race_m + race_a + usborn"), 
                my_data = qs, 
                coef_order = c(2:9, 1), 
                coef_names = c(char_vars_2, "Intercept", "N", "F-statistic"),
                my_caption = "Predicting Award Using Respondents' Characteristics")

######################################################################
# Effects on political attitudes and behavior

# Run the regression analysis
# Party ID
party_id <- rbind(gen_results(analysis_func(outcome_var = "party_d", covariates = qs_cov,
                                            my_caption = "Democrat (Includes Lean)", df = qs),
                              my_caption = "Democrat (Includes Lean)"),
                  gen_results(analysis_func(outcome_var = "party_r", covariates = qs_cov,
                                            my_caption = "Republican (Includes Lean)", df = qs),
                              my_caption = "Republican (Includes Lean)"),
                  gen_results(model_object = analysis_func(outcome_var = "ideo", covariates = qs_cov,
                                                           my_caption = "Political Ideology (1=Very Con., 5= Very Lib.)", 
                                                           df = qs),
                              my_caption = "Political Ideology (1=Very Con., 5= Very Lib.)"))
party_id$group <- "Political Identity"

# In a group
in_group <- rbind(
  gen_results(analysis_func(outcome_var = "group_b", covariates = qs_cov,
                            my_caption = "In a Scientific Organization", 
                            df = qs), "In a Scientific Organization"),
  gen_results(analysis_func(outcome_var = "group_p", covariates = qs_cov,
                            my_caption = "In the Union of Concerned Scientists", 
                            df = qs), "In the Union of Concerned Scientists")
)
in_group$group <- "Group Membership"

# March for Science
science_march <- rbind(
  gen_results(analysis_func(outcome_var = "march1", covariates = qs_cov,
                            my_caption = "Support for March for Science", df = qs),
              "Support for March for Science (1 = Strongly Oppose, 5 = Strongly Support)"),
  gen_results(analysis_func(outcome_var = "march_p", covariates = qs_cov,
                            my_caption = "Participated in March for Science", df = qs),
              "Participated in March for Science")
)
science_march$group = "March for Science"

# Communication
communicate <- rbind(
  gen_results(analysis_func(outcome_var = "comm1", covariates = qs_cov,
                            my_caption = "Communicate with Policy Makers (1 = Never; 4 = Often)", 
                            df = qs), "Communicate with Policy Makers (1 = Never; 4 = Often)"),
  gen_results(analysis_func(outcome_var = "comm2", covariates = qs_cov,
                            my_caption = "Communicate with Reporters (1 = Never; 4 = Often)", df = qs),
              "Communicate with Reporters (1 = Never; 4 = Often)")
)
communicate$group <- "Communicate Research Findings"

# Federal funding
fed_funding <- rbind(
  gen_results(analysis_func(outcome_var = "fedfund", covariates = qs_cov,  
                            my_caption = "Federal Funding for Science", df = qs),
              "Federal Funding for Science (1 = Decrease; 3 = Increase)"),
  gen_results(analysis_func(outcome_var = "nsf1", covariates = qs_cov, 
                            my_caption = "Funding for NSF (1 = Decrease; 3 = Increase)", df = qs),
              "Funding for NSF (1 = Decrease; 3 = Increase)"),
  gen_results(analysis_func(outcome_var = "nsf2", covariates = qs_cov,
                            my_caption = "Funding for NSF GRFP (1 = Decrease; 3 = Increase)", df = qs),
              "Funding for NSF GRFP (1 = Decrease; 3 = Increase)")
)

fed_funding$group <- "Attitudes Towards Government Funding for Science"

# Donation
donate_game <- rbind(gen_results(analysis_func(outcome_var = "donation", covariates = qs_cov,
                                               my_caption = "Donation Dollar Amount (in USD)", df = qs),
                                 my_caption = "Donation Dollar Amount"),
                     gen_results(analysis_func(outcome_var = "donation_p_n", covariates = qs_cov,
                                               my_caption = "Amount Donated to Organizations with Political Agenda (in USD)",
                                               df = qs), my_caption = "Amount Donated to Organizations with Political Agenda (in USD)"))
donate_game$group <- "Donation Behavorial Measure"

# Combine the results
qs_res <- rbind(science_march, fed_funding, communicate, donate_game, party_id)

# Clean up the results for data visualization
qs_res$stars <- ""
qs_res$stars[qs_res$p_value < 0.05] <- "*"
qs_res$stars[qs_res$p_value < 0.01] <- "**"
qs_res$stars[qs_res$p_value < 0.001] <- "***"
qs_res$text <- paste0(sprintf("%.3f", round(qs_res$est, 3)), 
                      " (", sprintf("%.3f", round(qs_res$SE, 3)), ")",
                      qs_res$stars, ";\nBaseline = ", 
                      sprintf("%.3f", round(qs_res$baseline, 3)), " (", 
                      sprintf("%.3f", round(qs_res$baseline_SE, 3)), ")")
qs_res$caption <- factor(qs_res$caption, levels = rev(qs_res$caption))
# Fig 4. Effect of being awarded the NSF Graduate Research Fellowship on political donation
qs_res$group <- factor(qs_res$group, levels = unique(qs_res$group)[c(1,2,3,5,4)])
ggplot(data = qs_res[qs_res$group != "Donation Behavorial Measure",], 
       aes(x = caption, y = est, ymin = qnorm(0.025)*SE + est,
           ymax = qnorm(0.975)*SE + est)) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 25),
                   name = "Outcomes") + 
  ylab("Estimated Effects") + 
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) + 
  geom_pointrange() + facet_wrap(~group, scales = "free", nrow = 3) +
  coord_flip() + theme_bw()
# Clean up the table
qs_res$est_c <- paste0(round(qs_res$est, 3), " (", round(qs_res$SE, 3), ")")
qs_res$base <- round(qs_res$baseline, 3)
qs_res$pval <- round(qs_res$p_value, 3)
# S1 Tab. Effect of being awarded the NSF Graduate Research Fellowship on political attitudes and behavior
stargazer(qs_res[,c("caption", "est_c", "base", "pval")], digits = NA,
          summary = FALSE, 
          rownames = FALSE)



# Robustness check: no covariates

# Party ID
party_id_nc <- rbind(gen_results(analysis_func(outcome_var = "party_d", covariates = NULL,
                                               my_caption = "Democrat (Includes Lean)", df = qs),
                                 my_caption = "Democrat (Includes Lean)"),
                     gen_results(analysis_func(outcome_var = "party_r", covariates = NULL,
                                               my_caption = "Republican (Includes Lean)", df = qs),
                                 my_caption = "Republican (Includes Lean)"),
                     gen_results(model_object = analysis_func(outcome_var = "ideo", covariates = NULL,
                                                              my_caption = "Political Ideology (1=Very Con., 5= Very Lib.)", 
                                                              df = qs),
                                 my_caption = "Political Ideology (1=Very Con., 5= Very Lib.)"))
party_id_nc$group <- "Political Identity"

# In a group
in_group_nc <- rbind(
  gen_results(analysis_func(outcome_var = "group_b", covariates = NULL,
                            my_caption = "In a Scientific Organization", 
                            df = qs), "In a Scientific Organization"),
  gen_results(analysis_func(outcome_var = "group_p", covariates = NULL,
                            my_caption = "In the Union of Concerned Scientists", 
                            df = qs), "In the Union of Concerned Scientists")
)
in_group_nc$group <- "Group Membership"

# March for Science
science_march_nc <- rbind(
  gen_results(analysis_func(outcome_var = "march1", covariates = NULL,
                            my_caption = "Support for March for Science", df = qs),
              "Support for March for Science (1 = Strongly Oppose, 5 = Strongly Support)"),
  gen_results(analysis_func(outcome_var = "march_p", covariates = NULL,
                            my_caption = "Participated in March for Science", df = qs),
              "Participated in March for Science")
)
science_march_nc$group = "March for Science"

# Communication
communicate_nc <- rbind(
  gen_results(analysis_func(outcome_var = "comm1", covariates = NULL,
                            my_caption = "Communicate with Policy Makers (1 = Never; 4 = Often)", 
                            df = qs), "Communicate with Policy Makers (1 = Never; 4 = Often)"),
  gen_results(analysis_func(outcome_var = "comm2", covariates = NULL,
                            my_caption = "Communicate with Reporters (1 = Never; 4 = Often)", df = qs),
              "Communicate with Reporters (1 = Never; 4 = Often)")
)
communicate_nc$group <- "Communicate Research Findings"

# Federal funding

fed_funding_nc <- rbind(
  gen_results(analysis_func(outcome_var = "fedfund", covariates = NULL,  
                            my_caption = "Federal Funding for Science", df = qs),
              "Federal Funding for Science (1 = Decrease; 3 = Increase)"),
  gen_results(analysis_func(outcome_var = "nsf1", covariates = NULL, 
                            my_caption = "Funding for NSF (1 = Decrease; 3 = Increase)", df = qs),
              "Funding for NSF (1 = Decrease; 3 = Increase)"),
  gen_results(analysis_func(outcome_var = "nsf2", covariates = NULL,
                            my_caption = "Funding for NSF GRFP (1 = Decrease; 3 = Increase)", df = qs),
              "Funding for NSF GRFP (1 = Decrease; 3 = Increase)")
)

fed_funding_nc$group <- "Attitudes Towards Government Funding for Science"

# Donation
donate_game_nc <- rbind(gen_results(analysis_func(outcome_var = "donation", covariates = NULL,
                                                  my_caption = "Donation Dollar Amount (in USD)", df = qs),
                                    my_caption = "Donation Dollar Amount"),
                        gen_results(analysis_func(outcome_var = "donation_p_n", covariates = NULL,
                                                  my_caption = "Amount Donated to Organizations with Political Agenda (in USD)",
                                                  df = qs), my_caption = "Amount Donated to Organizations with Political Agenda (in USD)"))
donate_game_nc$group <- "Donation Behavorial Measure"

# Combine results
qs_res_nc <- rbind(science_march_nc, fed_funding_nc, 
                   communicate_nc, donate_game_nc, party_id_nc)

# Clean up results for the table
qs_res_nc$est_c <- paste0(round(qs_res_nc$est, 3), " (", round(qs_res_nc$SE, 3), ")")
qs_res_nc$base <- round(qs_res_nc$baseline, 3)
qs_res_nc$pval <- round(qs_res_nc$p_value, 3)

# S1 Tab 2. Effect of being awarded the NSF Graduate Research Fellowship on political attitudes and behavior; not including background covariates
stargazer(qs_res_nc[,c("caption", "est_c", "base", "pval")], digits = NA,
          summary = FALSE, 
          rownames = FALSE)

# Interaction effect: treatment interacted with decision year
# Note: This analysis will not be possible with the publicly available data. 
# To protect subjects' privacy, we have anonymized the application year variable.
# Please contact us and we can run the analysis for you.
# survey_outcomes <- c("march1", "march_p", "fedfund", "nsf1", "nsf2", "comm1", "comm2",
#                        "donation", "donation_p_n", "party_d", "party_r", "ideo")
# survey_int <- do.call(rbind, lapply(X = survey_outcomes, int_var_name = "year.x",
#                                     analysis_func_int, covariates = qs_cov, df = qs))
# survey_int$caption <- qs_res$caption 
# survey_int$est_text <- paste0(round(survey_int$Coef, 3), " (", round(survey_int$SE, 3), ")")
# S1 Tab 13. Interaction effects of award and year of application o political attitudes and bahvior 
# stargazer(survey_int[,c("caption", "est_text", "p")], digits = 3,
#           summary = FALSE, covariate.labels = c("Outcome", "Interaction Effect Estimate (SE)", "p-value"),
#           rownames = FALSE, title = "Interaction Effect of Award and Year of Application on Political Attitudes and Behavior ($N=408$)")


######################################################################
# Survey experiment results

# Experiment 1

# Clean up the conditions
qs$condition_trump <- 0
qs$condition_trump[qs$condition == "1"] <- 0
qs$condition_trump[qs$condition == "2"] <- 1
qs$condition_rep <- 0
qs$condition_rep[qs$condition == "1"] <- 0
qs$condition_rep[qs$condition == "3"] <- 1

# Run the linear regression
# S1 Fig 14. Experiment 1 Results
linear_reg_func(my_formula = as.formula("march1 ~ condition_rep + condition_trump"), 
                my_data = qs, 
                coef_order = 3:1, weights = rep(1, nrow(qs)),
                coef_names = c("Trump Cue Treatment", "Republican Cue Treatment",
                               "(Intercept: Mean Outcome for Neutral Condition)",
                               "N", "F-statistic"),
                my_caption = "Experiment 1 Results: March for Science Support as a Function of Exposure to Partisan Cue Conditions")

## Interaction effect: Trump frame x NSF Award
summary(lm_robust(formula = march1 ~ condition_trump + award + award:condition_trump, 
                  data = qs[qs$condition %in% c("1", "2"),]))
summary(lm_robust(formula = march1 ~ condition_rep + award + award:condition_rep, 
                  data = qs[qs$condition %in% c("1", "3"),]))

# Experiment 2

# Create group names
policy_groups <- c("Carbon Tax + Support Candidate", 
                   "Science Only", "Carbon Tax")
qs$policy_t <- NA
for (i in 1:3) {
  qs$policy_t[qs$political == unique(qs$political)[i]] <-
    policy_groups[i]
}
qs$discipline_t <- NA
discipline <- c("Atmospheric Chemistry", "Economics")
for (i in 1:2) {
  qs$discipline_t[qs$discipline == unique(qs$discipline)[i]] <-
    discipline[i]
}

# Treatment: Discpline = Economics
# Clean up the data
qs$comfort_clean <- 6-qs$comfort
qs$pd_group <- paste0(qs$discipline_t, " -\n", qs$policy_t)
# Generate the summar statistics
comfort <- qs %>% group_by(pd_group) %>% dplyr::summarise(
  mean = mean(comfort_clean),
  se = sqrt(var(comfort_clean)/(n()-1))
)
# Clean up the outputs
comfort$text <- paste0(sprintf("%.3f", round(comfort$mean, 3)), 
                       " (", sprintf("%.3f", round(comfort$se, 3)), ")")
# S1 Fig 2: Experiment 2 Results
ggplot(comfort, aes(x = pd_group, y = mean, ymin = qnorm(0.025)*se + mean,
                    ymax = qnorm(0.975)*se + mean)) +
  geom_pointrange() + geom_text(aes(label = text), nudge_x = 0.3) +
  coord_flip() +
  scale_x_discrete(name = "Experimental Groups") + 
  scale_y_continuous(name = "Comfort with Political Advocacy\n(1 = Very Comfortable; 5 = Very Uncomfortable)", limits = c(3, 5)) +
  theme_bw()

qs$policy_t <- factor(qs$policy_t, levels = c("Science Only", "Carbon Tax",
                                              "Carbon Tax + Support Candidate"))
# Regression analysis
summary(lm_robust(formula = comfort_clean ~ discipline_t + policy_t + discipline_t:policy_t, data = qs))
# Interaction effect: Discpline = Economics x NSF Award 
summary(lm_robust(formula = comfort ~ discipline + award + award:condition_rep, data = qs))

